{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Final Project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "import sys\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sklearn as sk\n",
    "from os import listdir\n",
    "from os.path import isfile, join\n",
    "from timeit import default_timer as timer\n",
    "\n",
    "from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
    "from six.moves import cPickle as pickle\n",
    "from six.moves import range\n",
    "\n",
    "import librosa\n",
    "import soundfile as sf\n",
    "from python_speech_features import mfcc\n",
    "from python_speech_features import logfbank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # if running file for the first time.\n",
    "# import tarfile\n",
    "# t = tarfile.open('UrbanSound8K.tar.gz', mode=\"r:gz\")\n",
    "# t.extractall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>slice_file_name</th>\n",
       "      <th>fsID</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>salience</th>\n",
       "      <th>fold</th>\n",
       "      <th>classID</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100032-3-0-0.wav</td>\n",
       "      <td>100032</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.317551</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>dog_bark</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100263-2-0-117.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>58.5</td>\n",
       "      <td>62.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100263-2-0-121.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>60.5</td>\n",
       "      <td>64.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100263-2-0-126.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>63.0</td>\n",
       "      <td>67.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100263-2-0-137.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>68.5</td>\n",
       "      <td>72.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      slice_file_name    fsID  start        end  salience  fold  classID  \\\n",
       "0    100032-3-0-0.wav  100032    0.0   0.317551         1     5        3   \n",
       "1  100263-2-0-117.wav  100263   58.5  62.500000         1     5        2   \n",
       "2  100263-2-0-121.wav  100263   60.5  64.500000         1     5        2   \n",
       "3  100263-2-0-126.wav  100263   63.0  67.000000         1     5        2   \n",
       "4  100263-2-0-137.wav  100263   68.5  72.500000         1     5        2   \n",
       "\n",
       "              class  \n",
       "0          dog_bark  \n",
       "1  children_playing  \n",
       "2  children_playing  \n",
       "3  children_playing  \n",
       "4  children_playing  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# see see dataset orginization\n",
    "raw_sound = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')\n",
    "raw_sound.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.01174927  0.03039551]\n",
      " [-0.01153564  0.02471924]\n",
      " [-0.01644897  0.01794434]\n",
      " ..., \n",
      " [-0.00588989  0.00012207]\n",
      " [ 0.00314331  0.00585938]\n",
      " [ 0.00540161  0.00689697]]\n",
      "44100\n"
     ]
    }
   ],
   "source": [
    "# get sense of data\n",
    "data, samplerate = sf.read('UrbanSound8K/audio/fold1/102106-3-0-0.wav')\n",
    "print(data)\n",
    "print(samplerate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.5.2 |Anaconda 4.1.1 (64-bit)| (default, Jul  5 2016, 11:41:13) [MSC v.1900 64 bit (AMD64)]'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fold_list = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def extract_feature(file_name: str) -> tuple:\n",
    "    \"\"\"\n",
    "    Extracts 193 chromatographic features from sound file. \n",
    "    including: MFCC's, Chroma_StFt, Melspectrogram, Spectral Contrast, and Tonnetz\n",
    "    NOTE: this extraction technique changes the time series nature of the data\n",
    "    \"\"\"\n",
    "    X, sample_rate = librosa.load(file_name)\n",
    "    stft = np.abs(librosa.stft(X))\n",
    "    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)\n",
    "    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)\n",
    "    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)\n",
    "    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)\n",
    "    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)\n",
    "    return mfccs,chroma,mel,contrast,tonnetz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time to extract features from one file: 0.034sec\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\numpy\\core\\numeric.py:482: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
      "  return array(a, dtype, copy=False, order=order)\n"
     ]
    }
   ],
   "source": [
    "start_time = timer()\n",
    "a,b,c,d,e = extract_feature('UrbanSound8K/audio/fold1/102106-3-0-0.wav')\n",
    "end_time = timer()\n",
    "print('time to extract features from one file: {:.3f}sec'.format((end_time-start_time)/60))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(40,) (12,) (128,) (7,) (6,)\n",
      "-253.201139054\n",
      "193\n"
     ]
    }
   ],
   "source": [
    "print(a.shape,b.shape,c.shape,d.shape,e.shape)\n",
    "print(a[0])\n",
    "print(40+12+128+13)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UrbanSound8K/audio/fold1/.DS_Store\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\numpy\\core\\numeric.py:482: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
      "  return array(a, dtype, copy=False, order=order)\n",
      "C:\\Anaconda3\\lib\\site-packages\\librosa\\core\\pitch.py:148: UserWarning: Trying to estimate tuning from empty frequency set.\n",
      "  warnings.warn('Trying to estimate tuning from empty frequency set.')\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UrbanSound8K/audio/fold2/.DS_Store\n",
      "UrbanSound8K/audio/fold3/.DS_Store\n",
      "UrbanSound8K/audio/fold4/.DS_Store\n",
      "UrbanSound8K/audio/fold5/.DS_Store\n",
      "UrbanSound8K/audio/fold6/.DS_Store\n",
      "UrbanSound8K/audio/fold7/.DS_Store\n",
      "UrbanSound8K/audio/fold8/.DS_Store\n",
      "UrbanSound8K/audio/fold9/.DS_Store\n",
      "UrbanSound8K/audio/fold10/.DS_Store\n",
      "Exceptions:  10\n",
      "time taken: 79.0 minutes 43.5 seconds\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "mfcc_data = []\n",
    "exception_count = 0\n",
    "\n",
    "start_time = timer()\n",
    "for i in range(10):\n",
    "    # get file names\n",
    "    mypath = 'UrbanSound8K/audio/'+ fold_list[i] + '/'\n",
    "    files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]\n",
    "    \n",
    "    for fn in files:\n",
    "        try: # extract features\n",
    "            mfccs,chroma,mel,contrast,tonnetz = extract_feature(fn)\n",
    "            features = np.empty((0,193))\n",
    "            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])\n",
    "            features = np.vstack([features,ext_features])\n",
    "            \n",
    "        except: # else exception (.ds_store files are part of mac file systems)\n",
    "            print(fn)\n",
    "            exception_count += 1\n",
    "            continue\n",
    "            \n",
    "        l_row = raw_sound.loc[raw_sound['slice_file_name']==fn.split('/')[-1]].values.tolist()\n",
    "        label = l_row[0][-1]\n",
    "        fold = i+1\n",
    "    \n",
    "        mfcc_data.append([features, features.shape, label, fold])\n",
    "        \n",
    "            #print(f,old_samplerate,ss)\n",
    "        \n",
    "print(\"Exceptions: \", exception_count)\n",
    "end_time = timer()\n",
    "print(print(\"time taken: {0} minutes {1:.1f} seconds\".format((end_time - start_time)//60, (end_time - start_time)%60)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>features</th>\n",
       "      <th>shape</th>\n",
       "      <th>label</th>\n",
       "      <th>fold</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[[-402.458131222, 92.2208198393, 19.3559458953...</td>\n",
       "      <td>(1, 193)</td>\n",
       "      <td>dog_bark</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            features     shape     label  fold\n",
       "0  [[-402.458131222, 92.2208198393, 19.3559458953...  (1, 193)  dog_bark     1"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cols=[\"features\", \"shape\",\"label\", \"fold\"]\n",
    "mfcc_pd = pd.DataFrame(data = mfcc_data, columns=cols)\n",
    "mfcc_pd.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8732,)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert label to class number\n",
    "le = LabelEncoder()\n",
    "label_num = le.fit_transform(mfcc_pd[\"label\"])\n",
    "\n",
    "# one hot encode\n",
    "ohe = OneHotEncoder()\n",
    "onehot = ohe.fit_transform(label_num.reshape(-1, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(10):\n",
    "    mfcc_pd[le.classes_[i]] = onehot[:,i].toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>features</th>\n",
       "      <th>shape</th>\n",
       "      <th>label</th>\n",
       "      <th>fold</th>\n",
       "      <th>air_conditioner</th>\n",
       "      <th>car_horn</th>\n",
       "      <th>children_playing</th>\n",
       "      <th>dog_bark</th>\n",
       "      <th>drilling</th>\n",
       "      <th>engine_idling</th>\n",
       "      <th>gun_shot</th>\n",
       "      <th>jackhammer</th>\n",
       "      <th>siren</th>\n",
       "      <th>street_music</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[[-402.458131222, 92.2208198393, 19.3559458953...</td>\n",
       "      <td>(1, 193)</td>\n",
       "      <td>dog_bark</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            features     shape     label  \\\n",
       "0  [[-402.458131222, 92.2208198393, 19.3559458953...  (1, 193)  dog_bark   \n",
       "\n",
       "   fold  air_conditioner  car_horn  children_playing  dog_bark  drilling  \\\n",
       "0     1              0.0       0.0               0.0       1.0       0.0   \n",
       "\n",
       "   engine_idling  gun_shot  jackhammer  siren  street_music  \n",
       "0            0.0       0.0         0.0    0.0           0.0  "
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mfcc_pd.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ll = [mfcc_pd['features'][i].ravel() for i in range(mfcc_pd.shape[0])]\n",
    "mfcc_pd['sample'] = pd.Series(ll, index=mfcc_pd.index)\n",
    "del mfcc_pd['features']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>shape</th>\n",
       "      <th>label</th>\n",
       "      <th>fold</th>\n",
       "      <th>air_conditioner</th>\n",
       "      <th>car_horn</th>\n",
       "      <th>children_playing</th>\n",
       "      <th>dog_bark</th>\n",
       "      <th>drilling</th>\n",
       "      <th>engine_idling</th>\n",
       "      <th>gun_shot</th>\n",
       "      <th>jackhammer</th>\n",
       "      <th>siren</th>\n",
       "      <th>street_music</th>\n",
       "      <th>sample</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(1, 193)</td>\n",
       "      <td>dog_bark</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>[-402.458131222, 92.2208198393, 19.3559458953,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      shape     label  fold  air_conditioner  car_horn  children_playing  \\\n",
       "0  (1, 193)  dog_bark     1              0.0       0.0               0.0   \n",
       "\n",
       "   dog_bark  drilling  engine_idling  gun_shot  jackhammer  siren  \\\n",
       "0       1.0       0.0            0.0       0.0         0.0    0.0   \n",
       "\n",
       "   street_music                                             sample  \n",
       "0           0.0  [-402.458131222, 92.2208198393, 19.3559458953,...  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mfcc_pd.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# for use in Networks with 193 features.ipynb\n",
    "pickle.dump(mfcc_pd, open('193_features.p','wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# for use in SVM_RF_NB_final.ipynb\n",
    "mfcc_data[\"label_id\"] = label_num\n",
    "pickle.dump(mfcc_data, open('feature_data1.p','wb'))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
